## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:arules':
##
## intersect, recode, setdiff, setequal, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:DescTools':
##
## Recode
## The following object is masked from 'package:arules':
##
## recode
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:arules':
##
## intersect, setdiff, union
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
## The following object is masked from 'package:DescTools':
##
## BoxCox
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following objects are masked from 'package:DescTools':
##
## MAE, RMSE
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
##
## gas
## The following object is masked from 'package:DescTools':
##
## %^%
Part 1: First look
cancer_incidence<- read.csv(file="C:/Users/olivi/OneDrive/Documents/DataSets/13100109-eng/13100109.csv", sep = ",")
From the cancer_incidence file arbitrary features are removed first then rows with about characteristics that aren’t what is being analyzed.
Basic preliminary before moving to pandas-profiling
cancer_2<-cancer_table[-grep("Statistically different from", cancer_table$Characteristics),]
cancer_2<-cancer_2[-grep("95%", cancer_2$Characteristics),]
cancer_2<-cancer_2[grep("age-standardized", cancer_2$Characteristics),]
cancer_2<-cancer_2[-grep("Canada", cancer_2$GEO),]
cancer_2<-cancer_2[-grep("Peer", cancer_2$GEO),]
cancer_2<- cancer_2[!duplicated(cancer_2),]
cancer_2.1<- spread(cancer_2, Characteristics, VALUE)
cancer_2.2<- cancer_2.1[,1:5]
cancer_2.2<- spread(cancer_2.2, Selected.sites.of.cancer..ICD.O.3., 'Cancer incidence (age-standardized rate per 100,000 population)')
cancer_2.2<-as.data.frame(unclass(cancer_2.2), stringsAsFactors = TRUE)
#levels(cancer_2.2$GEO)
Part 2: preliminary analysis
hist(cancer_2.2[,4], breaks = 50, main = "All cancer sites")
hist(cancer_2.2[,5], breaks = 50, main = "Bronchus and lung cancer")
hist(cancer_2.2[,6], breaks = 50, main = "Colon cancer")
hist(cancer_2.2[,7], breaks = 50, main = "Female breast cancer")
hist(cancer_2.2[,8], breaks = 25, main = "Prostate cancer")
Part 3: Stratify data
##continuous
cancer_ON.2<- cancer_2.2[grep("Ontario", cancer_2.2$GEO),]
cancer_QC.2<- cancer_2.2[grep("Quebec", cancer_2.2$GEO),]
cancer_BC.2<- cancer_2.2[grep("British Columbia", cancer_2.2$GEO),]
cancer_MB.2<- cancer_2.2[grep("Manitoba", cancer_2.2$GEO),]
cancer_SK.2<- cancer_2.2[grep("Saskatchewan", cancer_2.2$GEO),]
cancer_AB.2<- cancer_2.2[grep("Alberta", cancer_2.2$GEO),]
cancer_midwest.2<- rbind(cancer_MB.2,cancer_AB.2,cancer_SK.2)
cancer_NL.2<- cancer_2.2[grep("Newfoundland", cancer_2.2$GEO),]
cancer_NB.2<- cancer_2.2[grep("New Brunswick", cancer_2.2$GEO),]
cancer_NS.2<- cancer_2.2[grep("Nova Scotia", cancer_2.2$GEO),]
cancer_PEI.2<- cancer_2.2[grep("Prince Edward Island",cancer_2.2$GEO),]
cancer_maritimes.2<- rbind(cancer_NS.2,cancer_NB.2,cancer_NL.2,cancer_PEI.2)
cancer_YT.2<- cancer_2.2[grep("Yukon", cancer_2.2$GEO),]
cancer_NWT.2<-cancer_2.2[grep("Northwest Territories", cancer_2.2$GEO),]
cancer_NVT.2<-cancer_2.2[grep("Nunavut", cancer_2.2$GEO),]
cancer_territories.2<- rbind(cancer_NWT.2,cancer_YT.2,cancer_NVT.2)
Part 4: Preliminary time-series analysis
##try time series analysis
###Just a peek
ts_ON.2<-ts(data = cancer_ON.2, frequency = n_distinct(cancer_ON.2$GEO) ,start = 2001, end = 2015)
ts.plot(ts_ON.2)
ts_QC.2<-ts(data = cancer_QC.2, frequency = n_distinct(cancer_QC.2$GEO) ,start = 2001, end = 2015)
ts.plot(ts_QC.2)
ts_BC.2<-ts(data = cancer_BC.2, frequency = n_distinct(cancer_BC.2$GEO) ,start = 2001, end = 2015)
ts.plot(ts_BC.2)
ts_midwest.2<-ts(data = cancer_midwest.2, frequency = n_distinct(cancer_midwest.2$GEO) ,start = 2001, end = 2015)
ts.plot(ts_midwest.2)
ts_maritimes.2<-ts(data = cancer_maritimes.2,frequency = n_distinct(cancer_maritimes.2$GEO) , start = 2001, end = 2015)
ts.plot(ts_maritimes.2)
ts_territories.2<-ts(data = cancer_territories.2, frequency = n_distinct(cancer_territories.2$GEO) ,start = 2001, end = 2015)
ts.plot(ts_territories.2)
####SO the frequency used is the number of distinct values in the GEO feature.
####This is because this is the "interval" that repeats over time.
##"Argument frequency indicates the sampling frequency of the time series, with the default value 1 indicating one sample in each unit time interval."
Note that decomposition plots are in order ON, QC, BC, midwest, maritimes, territories and each group has a decomp plot for all cancer site, lung cancer and colon cancer, respectively.
plot(decompose(ts_ON.2[,4]))
plot(decompose(ts_ON.2[,5]))
plot(decompose(ts_ON.2[,6]))
plot(decompose(ts_QC.2[,4]))
plot(decompose(ts_QC.2[,5]))#$trend)
plot(decompose(ts_QC.2[,6]))
plot(decompose(ts_BC.2[,4]))
plot(decompose(ts_BC.2[,5]))
plot(decompose(ts_BC.2[,6]))
plot(decompose(ts_midwest.2[,4]))##residuals are not white noise
plot(decompose(ts_midwest.2[,5]))##residuals are not white noise
plot(decompose(ts_midwest.2[,6]))##residuals are not white noise
plot(decompose(ts_maritimes.2[,4]))
plot(decompose(ts_maritimes.2[,5]))##residuals are not white noise
plot(decompose(ts_maritimes.2[,6]))
plot(decompose(ts_territories.2[,4]))
plot(decompose(ts_territories.2[,5]))
plot(decompose(ts_territories.2[,6]))
ggplot(ts_ON.2[,4], aes(ts_ON.2[,1], ts_ON.2[,4]))+
geom_smooth(method="loess")+
xlab("Time Periods")+
ylab("Cancer Incidence")+
ggtitle("ON Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_ON.2[,5], aes(ts_ON.2[,1], ts_ON.2[,5]))+
geom_smooth(method="loess")+
xlab("Time Periods")+
ylab("Cancer Incidence")+
ggtitle("ON Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_ON.2[,6], aes(ts_ON.2[,1], ts_ON.2[,6]))+
geom_smooth(method="loess")+
xlab("Time Periods")+
ylab("Cancer Incidence")+
ggtitle("ON Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_QC.2[,4], aes(ts_QC.2[,1], ts_QC.2[,4]))+
geom_smooth(method="loess")+
xlab("Time Periods")+
ylab("Cancer Incidence")+
ggtitle("QC Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_QC.2[,5], aes(ts_QC.2[,1], ts_QC.2[,5]))+
geom_smooth(method="loess")+
xlab("Time Periods")+
ylab("Cancer Incidence")+
ggtitle("QC Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_QC.2[,6], aes(ts_QC.2[,1], ts_QC.2[,6]))+
geom_smooth(method="loess")+
xlab("Time Periods")+
ylab("Cancer Incidence")+
ggtitle("QC Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_BC.2[,4], aes(ts_BC.2[,1], ts_BC.2[,4]))+
geom_smooth(method="loess")+
xlab("Time Periods")+
ylab("Cancer Incidence")+
ggtitle("BC Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_BC.2[,5], aes(ts_BC.2[,1], ts_BC.2[,5]))+
geom_smooth(method="loess")+
xlab("Time Periods")+
ylab("Cancer Incidence")+
ggtitle("BC Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_BC.2[,6], aes(ts_BC.2[,1], ts_BC.2[,6]))+
geom_smooth(method="loess")+
xlab("Time Periods")+
ylab("Cancer Incidence")+
ggtitle("BC Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_midwest.2[,4], aes(ts_midwest.2[,1], ts_midwest.2[,4]))+
geom_smooth(method="loess")+
xlab("2000 - 2015")+
ylab("Cancer Incidence")+
ggtitle("Midwest Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_midwest.2[,5], aes(ts_midwest.2[,1], ts_midwest.2[,5]))+
geom_smooth(method="loess")+
xlab("2000 - 2015")+
ylab("Cancer Incidence")+
ggtitle("Midwest Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_midwest.2[,6], aes(ts_midwest.2[,1], ts_midwest.2[,6]))+
geom_smooth(method="loess")+
xlab("2000 - 2015")+
ylab("Cancer Incidence")+
ggtitle("Midwest Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_maritimes.2[,4], aes(ts_maritimes.2[,1], ts_maritimes.2[,4]))+
geom_smooth(method="loess")+
xlab("2000 - 2015")+
ylab("Cancer Incidence")+
ggtitle("Maritimes Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_maritimes.2[,5], aes(ts_maritimes.2[,1], ts_maritimes.2[,5]))+
geom_smooth(method="loess")+
xlab("2000 - 2015")+
ylab("Cancer Incidence")+
ggtitle("Maritimes Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_maritimes.2[,6], aes(ts_maritimes.2[,1], ts_maritimes.2[,6]))+
geom_smooth(method="loess")+
xlab("2000 - 2015")+
ylab("Cancer Incidence")+
ggtitle("Maritimes Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_territories.2[,4], aes(ts_territories.2[,1], ts_territories.2[,4]))+
geom_smooth(method="loess")+
xlab("2000 - 2015")+
ylab("Cancer Incidence")+
ggtitle("Territories Smoothed Time-series - All Invasive Cancer")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_territories.2[,5], aes(ts_territories.2[,1], ts_territories.2[,5]))+
geom_smooth(method="loess")+
xlab("2000 - 2015")+
ylab("Cancer Incidence")+
ggtitle("Territories Smoothed Time-series - Bronchus and Lung")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
ggplot(ts_territories.2[,6], aes(ts_territories.2[,1], ts_territories.2[,6]))+
geom_smooth(method="loess")+
xlab("2000 - 2015")+
ylab("Cancer Incidence")+
ggtitle("Territories Smoothed Time-series - Colon and Rectum")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
Part 4B: Differencing
plot(ts_ON.dif.4<-diff(ts_ON.2[,4]))
plot(ts_ON.dif.5<-diff(ts_ON.2[,5]))
plot(ts_ON.dif.6<-diff(ts_ON.2[,6]))
plot(ts_QC.dif.4<-diff(ts_QC.2[,4]))
plot(ts_QC.dif.5<-diff(ts_QC.2[,5]))
plot(ts_QC.dif.6<-diff(ts_QC.2[,6]))
plot(ts_BC.dif.4<-diff(ts_BC.2[,4]))
plot(ts_BC.dif.5<-diff(ts_BC.2[,5]))
plot(ts_BC.dif.6<-diff(ts_BC.2[,6]))
plot(ts_mid.dif.4<-diff(ts_midwest.2[,4]))
plot(ts_mid.dif.5<-diff(ts_midwest.2[,5]))
plot(ts_mid.dif.6<-diff(ts_midwest.2[,6]))
plot(ts_mar.dif.4<-diff(ts_maritimes.2[,4]))
plot(ts_mar.dif.5<-diff(ts_maritimes.2[,5]))
plot(ts_mar.dif.6<-diff(ts_maritimes.2[,6]))
plot(ts_ter.dif.4<-diff(ts_territories.2[,4]))
plot(ts_ter.dif.5<-diff(ts_territories.2[,5]))
plot(ts_ter.dif.6<-diff(ts_territories.2[,6]))
Part 5: Stationary vs Non-stationary
##tests for stationarity
#ljung-box - non-stationary will have low p-value
##other tests indicated a lag of 6 so we will see how this changes the ljung-box test
Box.test(ts_ON.2[,4], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_ON.2[, 4]
## X-squared = 41.804, df = 1, p-value = 1.009e-10
Box.test(ts_ON.2[,5], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_ON.2[, 5]
## X-squared = 20.283, df = 1, p-value = 6.678e-06
Box.test(ts_ON.2[,6], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_ON.2[, 6]
## X-squared = 11.866, df = 1, p-value = 0.0005718
Box.test(ts_QC.2[,4], type="Ljung-Box")##fail to reject null of stationarity
##
## Box-Ljung test
##
## data: ts_QC.2[, 4]
## X-squared = 3.5323, df = 1, p-value = 0.06018
Box.test(ts_QC.2[,5], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_QC.2[, 5]
## X-squared = 31.127, df = 1, p-value = 2.417e-08
Box.test(ts_QC.2[,6], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_QC.2[, 6]
## X-squared = 31.829, df = 1, p-value = 1.684e-08
Box.test(ts_BC.2[,4], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_BC.2[, 4]
## X-squared = 14.29, df = 1, p-value = 0.0001567
Box.test(ts_BC.2[,5], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_BC.2[, 5]
## X-squared = 6.0415, df = 1, p-value = 0.01397
Box.test(ts_BC.2[,6], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_BC.2[, 6]
## X-squared = 7.7952, df = 1, p-value = 0.005239
Box.test(ts_midwest.2[,4], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_midwest.2[, 4]
## X-squared = 32.01, df = 1, p-value = 1.534e-08
Box.test(ts_midwest.2[,5], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_midwest.2[, 5]
## X-squared = 13.422, df = 1, p-value = 0.0002486
Box.test(ts_midwest.2[,6], type="Ljung-Box")##fail to reject null of stationarity
##
## Box-Ljung test
##
## data: ts_midwest.2[, 6]
## X-squared = 2.085, df = 1, p-value = 0.1487
Box.test(ts_maritimes.2[,4], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_maritimes.2[, 4]
## X-squared = 34.233, df = 1, p-value = 4.889e-09
Box.test(ts_maritimes.2[,5], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_maritimes.2[, 5]
## X-squared = 41.948, df = 1, p-value = 9.374e-11
Box.test(ts_maritimes.2[,6], type="Ljung-Box")
##
## Box-Ljung test
##
## data: ts_maritimes.2[, 6]
## X-squared = 8.2406, df = 1, p-value = 0.004096
Box.test(ts_territories.2[,4], type="Ljung-Box")##fail to reject null of stationarity
##
## Box-Ljung test
##
## data: ts_territories.2[, 4]
## X-squared = 0.19385, df = 1, p-value = 0.6597
Box.test(ts_territories.2[,5], type="Ljung-Box")##fail to reject null of stationarity
##
## Box-Ljung test
##
## data: ts_territories.2[, 5]
## X-squared = 1.9755, df = 1, p-value = 0.1599
Box.test(ts_territories.2[,6], type="Ljung-Box")##fail to reject null of stationarity
##
## Box-Ljung test
##
## data: ts_territories.2[, 6]
## X-squared = 8.0948e-06, df = 1, p-value = 0.9977
#generally reject the null that these are stationary time series
#stationary might indicate that there is an equally likely
#chance of cancer diagnosis regardless of location
#Augmented dickey-fuller t-stat test for unit root
options(warn = -1)
#non-stationary has a large p-value
adf.test(ts_ON.2[,4])
##
## Augmented Dickey-Fuller Test
##
## data: ts_ON.2[, 4]
## Dickey-Fuller = -8.561, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_ON.2[,5])
##
## Augmented Dickey-Fuller Test
##
## data: ts_ON.2[, 5]
## Dickey-Fuller = -7.9376, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_ON.2[,6])
##
## Augmented Dickey-Fuller Test
##
## data: ts_ON.2[, 6]
## Dickey-Fuller = -7.7782, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_QC.2[,4])
##
## Augmented Dickey-Fuller Test
##
## data: ts_QC.2[, 4]
## Dickey-Fuller = -6.9374, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_QC.2[,5])
##
## Augmented Dickey-Fuller Test
##
## data: ts_QC.2[, 5]
## Dickey-Fuller = -7.5649, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_QC.2[,6])
##
## Augmented Dickey-Fuller Test
##
## data: ts_QC.2[, 6]
## Dickey-Fuller = -4.9935, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_BC.2[,4])
##
## Augmented Dickey-Fuller Test
##
## data: ts_BC.2[, 4]
## Dickey-Fuller = -5.5942, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_BC.2[,5])
##
## Augmented Dickey-Fuller Test
##
## data: ts_BC.2[, 5]
## Dickey-Fuller = -5.1055, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_BC.2[,6])
##
## Augmented Dickey-Fuller Test
##
## data: ts_BC.2[, 6]
## Dickey-Fuller = -5.318, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_midwest.2[,4])
##
## Augmented Dickey-Fuller Test
##
## data: ts_midwest.2[, 4]
## Dickey-Fuller = -7.0176, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_midwest.2[,5])
##
## Augmented Dickey-Fuller Test
##
## data: ts_midwest.2[, 5]
## Dickey-Fuller = -7.0317, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_midwest.2[,6])
##
## Augmented Dickey-Fuller Test
##
## data: ts_midwest.2[, 6]
## Dickey-Fuller = -6.2022, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_maritimes.2[,4])
##
## Augmented Dickey-Fuller Test
##
## data: ts_maritimes.2[, 4]
## Dickey-Fuller = -5.9679, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_maritimes.2[,5])
##
## Augmented Dickey-Fuller Test
##
## data: ts_maritimes.2[, 5]
## Dickey-Fuller = -6.2437, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_maritimes.2[,6])
##
## Augmented Dickey-Fuller Test
##
## data: ts_maritimes.2[, 6]
## Dickey-Fuller = -6.7881, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(ts_territories.2[,4])##fail to reject null of non-stationarity
##
## Augmented Dickey-Fuller Test
##
## data: ts_territories.2[, 4]
## Dickey-Fuller = -2.4322, Lag order = 3, p-value = 0.403
## alternative hypothesis: stationary
adf.test(ts_territories.2[,5])##fail to reject null of non-stationarity
##
## Augmented Dickey-Fuller Test
##
## data: ts_territories.2[, 5]
## Dickey-Fuller = -2.6982, Lag order = 3, p-value = 0.2979
## alternative hypothesis: stationary
adf.test(ts_territories.2[,6])##fail to reject null of non-stationarity
##
## Augmented Dickey-Fuller Test
##
## data: ts_territories.2[, 6]
## Dickey-Fuller = -2.1182, Lag order = 3, p-value = 0.527
## alternative hypothesis: stationary
##reject the null of non-stationarity for all except ^^
##KPSS for level or trend stationarity
options(warn = -1)
## lower p-value indicates non stationarity
kpss.test(ts_ON.2[,4], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_ON.2[, 4]
## KPSS Trend = 0.03125, Truncation lag parameter = 6, p-value = 0.1
kpss.test(ts_ON.2[,5], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_ON.2[, 5]
## KPSS Trend = 0.02115, Truncation lag parameter = 6, p-value = 0.1
kpss.test(ts_ON.2[,6], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_ON.2[, 6]
## KPSS Trend = 0.045779, Truncation lag parameter = 6, p-value = 0.1
kpss.test(ts_QC.2[,4], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_QC.2[, 4]
## KPSS Trend = 0.027699, Truncation lag parameter = 5, p-value = 0.1
kpss.test(ts_QC.2[,5], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_QC.2[, 5]
## KPSS Trend = 0.030488, Truncation lag parameter = 5, p-value = 0.1
kpss.test(ts_QC.2[,6], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_QC.2[, 6]
## KPSS Trend = 0.043577, Truncation lag parameter = 5, p-value = 0.1
kpss.test(ts_BC.2[,4], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_BC.2[, 4]
## KPSS Trend = 0.094938, Truncation lag parameter = 4, p-value = 0.1
kpss.test(ts_BC.2[,5], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_BC.2[, 5]
## KPSS Trend = 0.033942, Truncation lag parameter = 4, p-value = 0.1
kpss.test(ts_BC.2[,6], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_BC.2[, 6]
## KPSS Trend = 0.04278, Truncation lag parameter = 4, p-value = 0.1
kpss.test(ts_midwest.2[,4], null = "Trend")##reject the null that this is level stationary
##
## KPSS Test for Trend Stationarity
##
## data: ts_midwest.2[, 4]
## KPSS Trend = 0.096673, Truncation lag parameter = 5, p-value = 0.1
kpss.test(ts_midwest.2[,5], null = "Trend")##reject the null that this is level stationary
##
## KPSS Test for Trend Stationarity
##
## data: ts_midwest.2[, 5]
## KPSS Trend = 0.048228, Truncation lag parameter = 5, p-value = 0.1
kpss.test(ts_midwest.2[,6], null = "Trend")##reject the null that this is level/trend stationary
##
## KPSS Test for Trend Stationarity
##
## data: ts_midwest.2[, 6]
## KPSS Trend = 0.3926, Truncation lag parameter = 5, p-value = 0.01
kpss.test(ts_maritimes.2[,4], null = "Trend")##reject the null that this is level/trend stationary
##
## KPSS Test for Trend Stationarity
##
## data: ts_maritimes.2[, 4]
## KPSS Trend = 0.16538, Truncation lag parameter = 5, p-value = 0.03385
kpss.test(ts_maritimes.2[,5], null = "Trend")##reject the null that this is trend stationary
##
## KPSS Test for Trend Stationarity
##
## data: ts_maritimes.2[, 5]
## KPSS Trend = 0.3217, Truncation lag parameter = 5, p-value = 0.01
kpss.test(ts_maritimes.2[,6], null = "Trend")##reject the null that this is level/trend stationary
##
## KPSS Test for Trend Stationarity
##
## data: ts_maritimes.2[, 6]
## KPSS Trend = 0.1833, Truncation lag parameter = 5, p-value = 0.02226
kpss.test(ts_territories.2[,4], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_territories.2[, 4]
## KPSS Trend = 0.097352, Truncation lag parameter = 3, p-value = 0.1
kpss.test(ts_territories.2[,5], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_territories.2[, 5]
## KPSS Trend = 0.075875, Truncation lag parameter = 3, p-value = 0.1
kpss.test(ts_territories.2[,6], null = "Trend")
##
## KPSS Test for Trend Stationarity
##
## data: ts_territories.2[, 6]
## KPSS Trend = 0.065017, Truncation lag parameter = 3, p-value = 0.1
##cannot reject the null of stationarity, p-value indicates they could be stationary
Part 6: test/train split
split.ON<-ts_split(ts.obj=ts_ON.2, sample.out =104)
train.ON<- split.ON$train
test.ON<- split.ON$test
split.QC<- ts_split(ts_QC.2, sample.out = 38)
train.QC<- split.QC$train
test.QC<- split.QC$test
split.BC<- ts_split(ts_BC.2, sample.out = 34)
train.BC<- split.BC$train
test.BC<- split.BC$test
split.mid<- ts_split(ts_midwest.2, sample.out =48)
train.mid<- split.mid$train
test.mid<- split.mid$test
split.mar<- ts_split(ts_maritimes.2, sample.out = 38)
train.mar<- split.mar$train
test.mar<- split.mar$test
split.terr<- ts_split(ts_territories.2, sample.out = 6)
train.terr<-split.terr$train
test.terr<-split.terr$test
Part 7: TSLM
#TSLM
#need to train models
##time series regression [tsr]-> tslm
tsr_ON<- tslm(formula= train.ON[,4]~trend+season,data=train.ON)
summary(tsr_ON)
##
## Call:
## tslm(formula = train.ON[, 4] ~ trend + season, data = train.ON)
##
## Residuals:
## Min 1Q Median 3Q Max
## -164.354 -55.871 -3.862 57.829 181.937
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 531.961026 20.159882 26.387 <2e-16 ***
## trend 0.020031 0.015681 1.277 0.2020
## season2 -32.846658 28.225422 -1.164 0.2450
## season3 -28.383355 28.225208 -1.006 0.3150
## season4 -42.603387 28.225004 -1.509 0.1317
## season5 -13.215084 28.224807 -0.468 0.6398
## season6 -23.060116 28.224620 -0.817 0.4143
## season7 -30.746813 28.224442 -1.089 0.2765
## season8 -30.383511 28.224272 -1.077 0.2822
## season9 -14.395209 28.224110 -0.510 0.6102
## season10 -47.040240 28.223958 -1.667 0.0961 .
## season11 -30.451938 28.223814 -1.079 0.2811
## season12 -55.113636 28.223679 -1.953 0.0513 .
## season13 -0.775333 28.223553 -0.027 0.9781
## season14 -21.878698 28.223435 -0.775 0.4385
## season15 0.601271 28.223326 0.021 0.9830
## season16 -1.993760 28.223226 -0.071 0.9437
## season17 6.127875 28.223135 0.217 0.8282
## season18 11.866178 28.223052 0.420 0.6743
## season19 -13.712187 28.222978 -0.486 0.6273
## season20 -6.515551 28.222912 -0.231 0.8175
## season21 -4.735582 28.222856 -0.168 0.8668
## season22 -23.030614 28.222808 -0.816 0.4148
## season23 1.674355 28.222769 0.059 0.9527
## season24 -23.137343 28.222738 -0.820 0.4127
## season25 -15.915707 28.222716 -0.564 0.5730
## season26 -10.969071 28.222703 -0.389 0.6977
## season27 -23.680769 28.222699 -0.839 0.4018
## season28 0.949200 28.222703 0.034 0.9732
## season29 3.579169 28.222716 0.127 0.8991
## season30 0.009137 28.222738 0.000 0.9997
## season31 31.447440 28.222769 1.114 0.2656
## season32 32.427408 28.222808 1.149 0.2510
## season33 12.282377 28.222856 0.435 0.6636
## season34 35.812346 28.222912 1.269 0.2050
## season35 -17.307685 28.222978 -0.613 0.5400
## season36 -10.602716 28.223052 -0.376 0.7073
## season37 -24.697747 28.223135 -0.875 0.3819
## season38 -9.151112 28.223226 -0.324 0.7459
## season39 -17.687809 28.223326 -0.627 0.5311
## season40 -13.199507 28.223435 -0.468 0.6402
## season41 -14.361205 28.223553 -0.509 0.6111
## season42 -27.864570 28.223679 -0.987 0.3239
## season43 -30.684601 28.223814 -1.087 0.2774
## season44 -59.271298 28.223958 -2.100 0.0362 *
## season45 -12.157996 28.224110 -0.431 0.6668
## season46 -24.628027 28.224272 -0.873 0.3833
## season47 -18.639725 28.224442 -0.660 0.5093
## season48 -16.484756 28.224620 -0.584 0.5594
## season49 -8.354787 28.224807 -0.296 0.7673
## season50 -36.183152 28.225004 -1.282 0.2004
## season51 -16.544850 28.225208 -0.586 0.5580
## season52 -32.689881 28.225422 -1.158 0.2473
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 70.5 on 572 degrees of freedom
## Multiple R-squared: 0.07845, Adjusted R-squared: -0.005324
## F-statistic: 0.9365 on 52 and 572 DF, p-value: 0.6026
tsr_ON.5<- tslm(formula= train.ON[,5]~trend +season ,data=train.ON)
summary(tsr_ON.5)
##
## Call:
## tslm(formula = train.ON[, 5] ~ trend + season, data = train.ON)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.887 -11.034 -0.478 9.948 89.766
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.171548 4.788426 14.028 < 2e-16 ***
## trend 0.002057 0.003725 0.552 0.580985
## season2 -6.305627 6.704174 -0.941 0.347331
## season3 -4.791017 6.704123 -0.715 0.475125
## season4 -6.134741 6.704074 -0.915 0.360537
## season5 -2.378464 6.704028 -0.355 0.722884
## season6 1.419479 6.703983 0.212 0.832388
## season7 -1.857578 6.703941 -0.277 0.781813
## season8 6.315365 6.703900 0.942 0.346568
## season9 -2.020026 6.703862 -0.301 0.763278
## season10 -7.230416 6.703826 -1.079 0.281243
## season11 -0.407473 6.703792 -0.061 0.951554
## season12 0.657137 6.703760 0.098 0.921947
## season13 16.138413 6.703730 2.407 0.016383 *
## season14 9.311356 6.703702 1.389 0.165377
## season15 13.659299 6.703676 2.038 0.042051 *
## season16 8.873909 6.703652 1.324 0.186117
## season17 10.188519 6.703630 1.520 0.129101
## season18 11.419795 6.703611 1.704 0.089012 .
## season19 6.126071 6.703593 0.914 0.361181
## season20 12.074014 6.703578 1.801 0.072209 .
## season21 4.971957 6.703564 0.742 0.458580
## season22 5.694900 6.703553 0.850 0.395939
## season23 0.417843 6.703544 0.062 0.950320
## season24 -3.200880 6.703536 -0.477 0.633195
## season25 -7.452937 6.703531 -1.112 0.266694
## season26 -2.229994 6.703528 -0.333 0.739513
## season27 3.959615 6.703527 0.591 0.554971
## season28 13.899225 6.703528 2.073 0.038580 *
## season29 11.105501 6.703531 1.657 0.098136 .
## season30 16.978444 6.703536 2.533 0.011583 *
## season31 13.226387 6.703544 1.973 0.048972 *
## season32 24.765997 6.703553 3.694 0.000242 ***
## season33 20.555607 6.703564 3.066 0.002269 **
## season34 34.878550 6.703578 5.203 2.74e-07 ***
## season35 11.584826 6.703593 1.728 0.084501 .
## season36 10.349436 6.703611 1.544 0.123175
## season37 2.764046 6.703630 0.412 0.680259
## season38 1.820322 6.703652 0.272 0.786072
## season39 1.518265 6.703676 0.226 0.820907
## season40 3.391208 6.703702 0.506 0.613142
## season41 5.105818 6.703730 0.762 0.446590
## season42 3.795427 6.703760 0.566 0.571504
## season43 -2.039963 6.703792 -0.304 0.761010
## season44 -0.875353 6.703826 -0.131 0.896157
## season45 -2.610744 6.703862 -0.389 0.697097
## season46 -1.054467 6.703900 -0.157 0.875071
## season47 4.776809 6.703941 0.713 0.476423
## season48 5.883085 6.703983 0.878 0.380556
## season49 8.889362 6.704028 1.326 0.185378
## season50 -1.462695 6.704074 -0.218 0.827367
## season51 -1.506419 6.704123 -0.225 0.822293
## season52 -12.800143 6.704174 -1.909 0.056726 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.75 on 572 degrees of freedom
## Multiple R-squared: 0.2277, Adjusted R-squared: 0.1575
## F-statistic: 3.243 on 52 and 572 DF, p-value: 4.909e-12
tsr_ON.6<- tslm(formula= train.ON[,6]~trend+season,data=train.ON)
summary(tsr_ON.6)
##
## Call:
## tslm(formula = train.ON[, 6] ~ trend + season, data = train.ON)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.634 -9.496 -0.547 8.381 46.230
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65.452219 3.788519 17.276 < 2e-16 ***
## trend 0.001627 0.002947 0.552 0.58104
## season2 -2.029191 5.304224 -0.383 0.70219
## season3 0.485848 5.304184 0.092 0.92705
## season4 -5.482446 5.304146 -1.034 0.30175
## season5 1.649260 5.304109 0.311 0.75596
## season6 1.005966 5.304074 0.190 0.84964
## season7 -2.462327 5.304040 -0.464 0.64266
## season8 -0.238955 5.304008 -0.045 0.96408
## season9 4.776085 5.303978 0.900 0.36825
## season10 -3.675542 5.303949 -0.693 0.48860
## season11 3.456164 5.303922 0.652 0.51491
## season12 -6.845464 5.303897 -1.291 0.19735
## season13 8.211243 5.303873 1.548 0.12214
## season14 6.234615 5.303851 1.175 0.24029
## season15 10.016321 5.303831 1.889 0.05946 .
## season16 9.764694 5.303812 1.841 0.06613 .
## season17 3.529734 5.303795 0.666 0.50599
## season18 5.978107 5.303779 1.127 0.26016
## season19 1.526479 5.303765 0.288 0.77360
## season20 1.683185 5.303753 0.317 0.75109
## season21 2.606558 5.303742 0.491 0.62329
## season22 0.521598 5.303733 0.098 0.92169
## season23 2.469970 5.303726 0.466 0.64160
## season24 1.401677 5.303720 0.264 0.79166
## season25 -1.633284 5.303716 -0.308 0.75823
## season26 2.081755 5.303713 0.393 0.69483
## season27 -0.278205 5.303713 -0.052 0.95818
## season28 4.170168 5.303713 0.786 0.43203
## season29 3.060207 5.303716 0.577 0.56417
## season30 5.541913 5.303720 1.045 0.29651
## season31 12.248619 5.303726 2.309 0.02127 *
## season32 13.455325 5.303733 2.537 0.01145 *
## season33 7.695365 5.303742 1.451 0.14735
## season34 17.185404 5.303753 3.240 0.00126 **
## season35 6.225444 5.303765 1.174 0.24097
## season36 0.007150 5.303779 0.001 0.99892
## season37 -2.869477 5.303795 -0.541 0.58870
## season38 1.345562 5.303812 0.254 0.79982
## season39 3.260602 5.303831 0.615 0.53896
## season40 1.700641 5.303851 0.321 0.74860
## season41 5.157347 5.303873 0.972 0.33128
## season42 5.322387 5.303897 1.003 0.31605
## season43 0.479093 5.303922 0.090 0.92806
## season44 2.460799 5.303949 0.464 0.64286
## season45 -0.707495 5.303978 -0.133 0.89393
## season46 -2.359122 5.304008 -0.445 0.65665
## season47 0.014250 5.304040 0.003 0.99786
## season48 -0.279043 5.304074 -0.053 0.95806
## season49 1.327663 5.304109 0.250 0.80244
## season50 -0.390631 5.304146 -0.074 0.94132
## season51 -0.667258 5.304184 -0.126 0.89994
## season52 1.122781 5.304224 0.212 0.83243
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.25 on 572 degrees of freedom
## Multiple R-squared: 0.1141, Adjusted R-squared: 0.03357
## F-statistic: 1.417 on 52 and 572 DF, p-value: 0.0329
tsr_QC<- tslm(formula = train.QC[,4]~trend+season, data = train.QC)
summary(tsr_QC)
##
## Call:
## tslm(formula = train.QC[, 4] ~ trend + season, data = train.QC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -217.67 -75.29 -11.71 70.79 536.62
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 551.16989 31.61431 17.434 < 2e-16 ***
## trend -0.05646 0.10552 -0.535 0.59318
## season2 35.07650 42.14527 0.832 0.40620
## season3 45.79129 42.14315 1.087 0.27848
## season4 36.15608 42.14130 0.858 0.39189
## season5 7.31254 42.13972 0.174 0.86240
## season6 6.64400 42.13840 0.158 0.87487
## season7 -50.34953 42.13734 -1.195 0.23348
## season8 14.77359 42.13655 0.351 0.72623
## season9 11.63839 42.13602 0.276 0.78266
## season10 -0.98015 42.13575 -0.023 0.98146
## season11 25.90964 42.13575 0.615 0.53928
## season12 28.13277 42.13602 0.668 0.50508
## season13 72.87256 42.13655 1.729 0.08521 .
## season14 137.78736 42.13734 3.270 0.00126 **
## season15 214.11882 42.13840 5.081 8.3e-07 ***
## season16 36.40028 42.13972 0.864 0.38869
## season17 38.13174 42.14130 0.905 0.36658
## season18 37.24653 42.14315 0.884 0.37781
## season19 37.31966 42.14527 0.886 0.37690
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 105.3 on 209 degrees of freedom
## Multiple R-squared: 0.2268, Adjusted R-squared: 0.1565
## F-statistic: 3.227 on 19 and 209 DF, p-value: 1.698e-05
tsr_QC.5<- tslm(formula= train.QC[,5]~trend+season, data=train.QC)
summary(tsr_QC.5)
##
## Call:
## tslm(formula = train.QC[, 5] ~ trend + season, data = train.QC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -111.157 -31.041 -1.991 24.030 239.457
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 94.67382 14.79562 6.399 1.01e-09 ***
## trend -0.03235 0.04939 -0.655 0.513
## season2 16.07120 19.72415 0.815 0.416
## season3 24.02855 19.72316 1.218 0.224
## season4 18.15256 19.72229 0.920 0.358
## season5 19.05158 19.72155 0.966 0.335
## season6 11.33392 19.72093 0.575 0.566
## season7 -12.12540 19.72044 -0.615 0.539
## season8 11.15695 19.72007 0.566 0.572
## season9 1.23097 19.71982 0.062 0.950
## season10 20.48831 19.71970 1.039 0.300
## season11 16.53733 19.71970 0.839 0.403
## season12 24.21968 19.71982 1.228 0.221
## season13 32.67702 19.72007 1.657 0.099 .
## season14 87.12604 19.72044 4.418 1.60e-05 ***
## season15 111.38338 19.72093 5.648 5.26e-08 ***
## season16 43.09073 19.72155 2.185 0.030 *
## season17 10.43141 19.72229 0.529 0.597
## season18 6.13876 19.72316 0.311 0.756
## season19 17.57944 19.72415 0.891 0.374
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 49.26 on 209 degrees of freedom
## Multiple R-squared: 0.2696, Adjusted R-squared: 0.2032
## F-statistic: 4.06 on 19 and 209 DF, p-value: 1.799e-07
tsr_QC.6<- tslm(formula= train.QC[,6]~trend+season, data=train.QC)
summary(tsr_QC.6)
##
## Call:
## tslm(formula = train.QC[, 6] ~ trend + season, data = train.QC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.377 -15.152 -2.902 11.001 301.299
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 76.07255 11.76680 6.465 7.03e-10 ***
## trend -0.03949 0.03928 -1.006 0.31579
## season2 3.23353 15.68641 0.206 0.83689
## season3 3.39803 15.68563 0.217 0.82871
## season4 4.89585 15.68494 0.312 0.75525
## season5 -5.24798 15.68435 -0.335 0.73826
## season6 8.01651 15.68386 0.511 0.60980
## season7 -4.13566 15.68346 -0.264 0.79227
## season8 2.16216 15.68317 0.138 0.89048
## season9 2.34332 15.68297 0.149 0.88137
## season10 4.92448 15.68287 0.314 0.75383
## season11 21.43898 15.68287 1.367 0.17308
## season12 12.90347 15.68297 0.823 0.41158
## season13 49.96797 15.68317 3.186 0.00166 **
## season14 39.25746 15.68346 2.503 0.01308 *
## season15 37.24695 15.68386 2.375 0.01846 *
## season16 39.36145 15.68435 2.510 0.01285 *
## season17 8.16761 15.68494 0.521 0.60311
## season18 4.68210 15.68563 0.298 0.76562
## season19 6.92159 15.68641 0.441 0.65949
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 39.18 on 209 degrees of freedom
## Multiple R-squared: 0.1574, Adjusted R-squared: 0.08076
## F-statistic: 2.054 on 19 and 209 DF, p-value: 0.007473
tsr_BC<- tslm(formula = train.BC[,4]~trend+season, data = train.BC)
summary(tsr_BC)
##
## Call:
## tslm(formula = train.BC[, 4] ~ trend + season, data = train.BC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -150.20 -50.08 -10.09 54.13 147.69
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 504.02295 20.04419 25.146 <2e-16 ***
## trend -0.09096 0.07831 -1.162 0.247
## season2 11.81394 26.49212 0.446 0.656
## season3 5.44657 26.49050 0.206 0.837
## season4 11.87086 26.48911 0.448 0.655
## season5 8.03682 26.48796 0.303 0.762
## season6 -16.64721 26.48703 -0.629 0.530
## season7 -35.35625 26.48634 -1.335 0.184
## season8 -10.25696 26.48587 -0.387 0.699
## season9 -3.52433 26.48564 -0.133 0.894
## season10 37.19997 26.48564 1.405 0.162
## season11 -1.90907 26.48587 -0.072 0.943
## season12 -1.98477 26.48634 -0.075 0.940
## season13 -4.14381 26.48703 -0.156 0.876
## season14 12.75548 26.48796 0.482 0.631
## season15 -14.42022 26.48911 -0.544 0.587
## season16 -7.41259 26.49050 -0.280 0.780
## season17 -14.47996 26.49212 -0.547 0.585
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 66.16 on 187 degrees of freedom
## Multiple R-squared: 0.06281, Adjusted R-squared: -0.02239
## F-statistic: 0.7373 on 17 and 187 DF, p-value: 0.7618
tsr_BC.5<- tslm(formula = train.BC[,5]~trend+season, data = train.BC)
summary(tsr_BC.5)
##
## Call:
## tslm(formula = train.BC[, 5] ~ trend + season, data = train.BC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.243 -9.546 0.736 8.012 36.597
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.15930 3.86712 17.367 < 2e-16 ***
## trend -0.01529 0.01511 -1.012 0.31288
## season2 2.02572 5.11112 0.396 0.69231
## season3 4.10768 5.11080 0.804 0.42258
## season4 9.83130 5.11054 1.924 0.05591 .
## season5 2.26325 5.11031 0.443 0.65836
## season6 -3.42146 5.11013 -0.670 0.50397
## season7 -7.07284 5.11000 -1.384 0.16797
## season8 4.31745 5.10991 0.845 0.39924
## season9 6.68274 5.10987 1.308 0.19254
## season10 6.42303 5.10987 1.257 0.21033
## season11 1.80498 5.10991 0.353 0.72431
## season12 7.81194 5.11000 1.529 0.12801
## season13 16.29389 5.11013 3.189 0.00168 **
## season14 16.17585 5.11031 3.165 0.00181 **
## season15 7.56614 5.11054 1.480 0.14042
## season16 3.74809 5.11080 0.733 0.46425
## season17 -0.45329 5.11112 -0.089 0.92943
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.76 on 187 degrees of freedom
## Multiple R-squared: 0.1924, Adjusted R-squared: 0.119
## F-statistic: 2.62 on 17 and 187 DF, p-value: 0.0007868
tsr_BC.6<-tslm(formula = train.BC[,6]~trend+season, data=train.BC)
summary(tsr_BC.6)
##
## Call:
## tslm(formula = train.BC[, 6] ~ trend + season, data = train.BC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.231 -7.982 -1.225 9.265 24.632
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.958208 3.470864 18.715 <2e-16 ***
## trend 0.007501 0.013560 0.553 0.581
## season2 1.350485 4.587392 0.294 0.769
## season3 -2.140349 4.587111 -0.467 0.641
## season4 -2.289516 4.586870 -0.499 0.618
## season5 -3.247017 4.586670 -0.708 0.480
## season6 -3.112850 4.586510 -0.679 0.498
## season7 -5.662018 4.586389 -1.235 0.219
## season8 -4.669518 4.586309 -1.018 0.310
## season9 1.189648 4.586269 0.259 0.796
## season10 -1.101186 4.586269 -0.240 0.811
## season11 -4.025353 4.586309 -0.878 0.381
## season12 -5.132854 4.586389 -1.119 0.265
## season13 -5.148688 4.586510 -1.123 0.263
## season14 -2.481189 4.586670 -0.541 0.589
## season15 -6.313689 4.586870 -1.376 0.170
## season16 -5.737856 4.587111 -1.251 0.213
## season17 -1.912024 4.587392 -0.417 0.677
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.46 on 187 degrees of freedom
## Multiple R-squared: 0.04368, Adjusted R-squared: -0.04326
## F-statistic: 0.5024 on 17 and 187 DF, p-value: 0.9497
tsr_mid<-tslm(formula = train.mid[,4]~trend+season, data=train.mid)
summary(tsr_mid)
##
## Call:
## tslm(formula = train.mid[, 4] ~ trend + season, data = train.mid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -95.146 -15.649 2.233 17.974 75.647
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 537.30148 8.72014 61.616 < 2e-16 ***
## trend -0.05290 0.02086 -2.537 0.011773 *
## season2 -57.57935 11.80755 -4.876 1.87e-06 ***
## season3 72.70688 11.80716 6.158 2.73e-09 ***
## season4 -0.90688 11.80681 -0.077 0.938833
## season5 -45.44565 11.80650 -3.849 0.000149 ***
## season6 66.72392 11.80622 5.652 4.11e-08 ***
## season7 1.75182 11.80598 0.148 0.882153
## season8 -60.59528 11.80578 -5.133 5.55e-07 ***
## season9 80.69929 11.80561 6.836 5.64e-11 ***
## season10 0.06053 11.80548 0.005 0.995913
## season11 -48.81157 11.80539 -4.135 4.78e-05 ***
## season12 71.81633 11.80534 6.083 4.12e-09 ***
## season13 0.37756 11.80532 0.032 0.974510
## season14 -56.26953 11.80534 -4.766 3.10e-06 ***
## season15 73.47503 11.80539 6.224 1.90e-09 ***
## season16 -2.13873 11.80548 -0.181 0.856378
## season17 -46.93583 11.80561 -3.976 9.06e-05 ***
## season18 66.04207 11.80578 5.594 5.54e-08 ***
## season19 4.41164 11.80598 0.374 0.708944
## season20 -56.61879 11.80622 -4.796 2.71e-06 ***
## season21 80.90078 11.80650 6.852 5.11e-11 ***
## season22 -1.56299 11.80681 -0.132 0.894784
## season23 -47.24342 11.80716 -4.001 8.19e-05 ***
## season24 67.23448 11.80755 5.694 3.30e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.49 on 264 degrees of freedom
## Multiple R-squared: 0.7694, Adjusted R-squared: 0.7484
## F-statistic: 36.7 on 24 and 264 DF, p-value: < 2.2e-16
tsr_mid.5<-tslm(formula = train.mid[,5]~trend+season, data=train.mid)
summary(tsr_mid.5)
##
## Call:
## tslm(formula = train.mid[, 5] ~ trend + season, data = train.mid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.284 -5.026 -0.936 2.799 56.624
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 86.013597 3.920819 21.938 < 2e-16 ***
## trend -0.061738 0.009377 -6.584 2.46e-10 ***
## season2 -9.598993 5.309006 -1.808 0.071735 .
## season3 12.079412 5.308832 2.275 0.023687 *
## season4 -2.392183 5.308675 -0.451 0.652635
## season5 -9.038778 5.308534 -1.703 0.089803 .
## season6 8.247960 5.308409 1.554 0.121441
## season7 4.368032 5.308302 0.823 0.411326
## season8 -7.253563 5.308211 -1.366 0.172951
## season9 19.349842 5.308136 3.645 0.000322 ***
## season10 -2.888420 5.308078 -0.544 0.586794
## season11 -8.693348 5.308037 -1.638 0.102661
## season12 6.701723 5.308012 1.263 0.207859
## season13 -1.253205 5.308004 -0.236 0.813540
## season14 -12.433133 5.308012 -2.342 0.019907 *
## season15 13.020272 5.308037 2.453 0.014817 *
## season16 -2.101323 5.308078 -0.396 0.692519
## season17 -8.222918 5.308136 -1.549 0.122552
## season18 7.838820 5.308211 1.477 0.140939
## season19 2.050558 5.308302 0.386 0.699591
## season20 -11.862703 5.308409 -2.235 0.026274 *
## season21 19.340702 5.308534 3.643 0.000324 ***
## season22 -2.805893 5.308675 -0.529 0.597563
## season23 -9.319155 5.308832 -1.755 0.080350 .
## season24 7.425917 5.309006 1.399 0.163065
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.26 on 264 degrees of freedom
## Multiple R-squared: 0.4133, Adjusted R-squared: 0.36
## F-statistic: 7.749 on 24 and 264 DF, p-value: < 2.2e-16
tsr_mid.6<- tslm(formula = train.mid[,6]~trend+season, data=train.mid)
summary(tsr_mid.6)
##
## Call:
## tslm(formula = train.mid[, 6] ~ trend + season, data = train.mid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.571 -4.962 -1.073 3.943 38.074
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 76.024694 2.614624 29.077 < 2e-16 ***
## trend -0.036881 0.006253 -5.898 1.12e-08 ***
## season2 -9.307616 3.540346 -2.629 0.009065 **
## season3 10.770932 3.540230 3.042 0.002583 **
## season4 -1.058854 3.540125 -0.299 0.765099
## season5 -13.271973 3.540031 -3.749 0.000218 ***
## season6 13.964909 3.539948 3.945 0.000102 ***
## season7 2.135123 3.539876 0.603 0.546918
## season8 -9.586329 3.539815 -2.708 0.007208 **
## season9 15.058886 3.539766 4.254 2.92e-05 ***
## season10 -0.304233 3.539727 -0.086 0.931573
## season11 -13.825685 3.539699 -3.906 0.000119 ***
## season12 16.094529 3.539683 4.547 8.30e-06 ***
## season13 1.764744 3.539677 0.499 0.618504
## season14 -10.256709 3.539683 -2.898 0.004075 **
## season15 14.955173 3.539699 4.225 3.29e-05 ***
## season16 -1.682946 3.539727 -0.475 0.634863
## season17 -15.212732 3.539766 -4.298 2.43e-05 ***
## season18 14.915816 3.539815 4.214 3.45e-05 ***
## season19 3.744364 3.539876 1.058 0.291129
## season20 -9.060421 3.539948 -2.559 0.011041 *
## season21 17.909793 3.540031 5.059 7.89e-07 ***
## season22 -1.594992 3.540125 -0.451 0.652686
## season23 -13.933111 3.540230 -3.936 0.000106 ***
## season24 13.703770 3.540346 3.871 0.000137 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.842 on 264 degrees of freedom
## Multiple R-squared: 0.6453, Adjusted R-squared: 0.6131
## F-statistic: 20.01 on 24 and 264 DF, p-value: < 2.2e-16
tsr_mar<-tslm(formula = train.mar[,4]~trend+season, data= train.mar)
summary(tsr_mar)
##
## Call:
## tslm(formula = train.mar[, 4] ~ trend + season, data = train.mar)
##
## Residuals:
## Min 1Q Median 3Q Max
## -161.83 -59.24 -9.80 71.50 163.80
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 592.33249 24.12047 24.557 <2e-16 ***
## trend -0.23634 0.08051 -2.935 0.0037 **
## season2 13.83730 32.15518 0.430 0.6674
## season3 -2.63470 32.15357 -0.082 0.9348
## season4 14.80997 32.15216 0.461 0.6455
## season5 3.92964 32.15095 0.122 0.9028
## season6 8.90764 32.14994 0.277 0.7820
## season7 4.11064 32.14913 0.128 0.8984
## season8 8.43031 32.14853 0.262 0.7934
## season9 9.66665 32.14813 0.301 0.7639
## season10 13.20299 32.14792 0.411 0.6817
## season11 6.85599 32.14792 0.213 0.8313
## season12 10.00899 32.14813 0.311 0.7559
## season13 15.02866 32.14853 0.467 0.6406
## season14 17.58166 32.14913 0.547 0.5850
## season15 2.30967 32.14994 0.072 0.9428
## season16 3.27100 32.15095 0.102 0.9191
## season17 17.24900 32.15216 0.536 0.5922
## season18 -3.83133 32.15357 -0.119 0.9053
## season19 5.01334 32.15518 0.156 0.8763
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 80.31 on 209 degrees of freedom
## Multiple R-squared: 0.04617, Adjusted R-squared: -0.04054
## F-statistic: 0.5325 on 19 and 209 DF, p-value: 0.9458
tsr_mar.5<- tslm(formula = train.mar[,5]~trend+season, data = train.mar)
summary(tsr_mar.5)
##
## Call:
## tslm(formula = train.mar[, 5] ~ trend + season, data = train.mar)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.636 -12.896 -1.701 10.654 69.180
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 92.54475 5.79665 15.965 <2e-16 ***
## trend -0.01377 0.01935 -0.712 0.478
## season2 0.83811 7.72756 0.108 0.914
## season3 -4.21479 7.72717 -0.545 0.586
## season4 4.50731 7.72683 0.583 0.560
## season5 -1.15392 7.72654 -0.149 0.881
## season6 0.55151 7.72630 0.071 0.943
## season7 -0.42639 7.72611 -0.055 0.956
## season8 2.42904 7.72596 0.314 0.754
## season9 -1.93219 7.72587 -0.250 0.803
## season10 3.05658 7.72582 0.396 0.693
## season11 -1.28799 7.72582 -0.167 0.868
## season12 0.50911 7.72587 0.066 0.948
## season13 -1.26045 7.72596 -0.163 0.871
## season14 1.60331 7.72611 0.208 0.836
## season15 -4.36625 7.72630 -0.565 0.573
## season16 -3.34415 7.72654 -0.433 0.666
## season17 -0.01372 7.72683 -0.002 0.999
## season18 -2.50829 7.72717 -0.325 0.746
## season19 -1.74452 7.72756 -0.226 0.822
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.3 on 209 degrees of freedom
## Multiple R-squared: 0.01802, Adjusted R-squared: -0.07125
## F-statistic: 0.2019 on 19 and 209 DF, p-value: 0.9999
tsr_mar.6<- tslm(formula = train.mar[,6]~trend+season, data = train.mar)
summary(tsr_mar.6)
##
## Call:
## tslm(formula = train.mar[, 6] ~ trend + season, data = train.mar)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.682 -11.005 -1.441 10.746 32.365
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 85.52787 4.21134 20.309 < 2e-16 ***
## trend -0.06586 0.01406 -4.685 5.03e-06 ***
## season2 1.05300 5.61417 0.188 0.851
## season3 -1.08947 5.61389 -0.194 0.846
## season4 1.30139 5.61365 0.232 0.817
## season5 -0.26608 5.61343 -0.047 0.962
## season6 2.50811 5.61326 0.447 0.655
## season7 -0.46769 5.61312 -0.083 0.934
## season8 2.68150 5.61301 0.478 0.633
## season9 1.60570 5.61294 0.286 0.775
## season10 5.05489 5.61291 0.901 0.369
## season11 1.73742 5.61291 0.310 0.757
## season12 2.86161 5.61294 0.510 0.611
## season13 0.56914 5.61301 0.101 0.919
## season14 1.37667 5.61312 0.245 0.806
## season15 0.85920 5.61326 0.153 0.878
## season16 0.40839 5.61343 0.073 0.942
## season17 -0.85908 5.61365 -0.153 0.879
## season18 -0.52655 5.61389 -0.094 0.925
## season19 0.34764 5.61417 0.062 0.951
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.02 on 209 degrees of freedom
## Multiple R-squared: 0.1061, Adjusted R-squared: 0.0248
## F-statistic: 1.305 on 19 and 209 DF, p-value: 0.1823
tsr_terr<- tslm(formula = train.terr[,4]~trend+season, data = train.terr)
summary(tsr_terr)
##
## Call:
## tslm(formula = train.terr[, 4] ~ trend + season, data = train.terr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -95.528 -14.160 2.437 17.632 58.849
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 578.6782 13.5179 42.808 < 2e-16 ***
## trend -2.3827 0.5165 -4.613 5.74e-05 ***
## season2 -30.8407 13.4214 -2.298 0.028044 *
## season3 57.4170 13.4214 4.278 0.000152 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.52 on 33 degrees of freedom
## Multiple R-squared: 0.6526, Adjusted R-squared: 0.621
## F-statistic: 20.66 on 3 and 33 DF, p-value: 1.02e-07
tsr_terr.5<- tslm(formula = train.terr[,5]~trend+season, data = train.terr)
summary(tsr_terr.5)
##
## Call:
## tslm(formula = train.terr[, 5] ~ trend + season, data = train.terr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.2407 -9.4529 0.6109 8.8013 25.5798
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 90.1626 5.5612 16.213 <2e-16 ***
## trend -0.2434 0.2125 -1.145 0.2603
## season2 -8.2518 5.5215 -1.494 0.1446
## season3 10.6082 5.5215 1.921 0.0634 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.79 on 33 degrees of freedom
## Multiple R-squared: 0.2717, Adjusted R-squared: 0.2054
## F-statistic: 4.103 on 3 and 33 DF, p-value: 0.01402
tsr_terr.6<-tslm(formula = train.terr[,6]~trend+season, data = train.terr)
Part 8: TSLM forecasting
test_forecast(forecast.obj = fcast.ON.4, actual = ts_ON.2[,4], test = test.ON[,4])
test_forecast(forecast.obj = fcast.ON.5, actual = ts_ON.2[,5], test = test.ON[,5])
test_forecast(forecast.obj = fcast.ON.6, actual = ts_ON.2[,6], test = test.ON[,6])
test_forecast(forecast.obj = fcast.QC.4, actual = ts_QC.2[,4], test = test.QC[,4])
test_forecast(forecast.obj = fcast.QC.5, actual = ts_QC.2[,5], test = test.QC[,5])
test_forecast(forecast.obj = fcast.QC.6, actual = ts_QC.2[,6], test = test.QC[,6])
test_forecast(forecast.obj = fcast.BC.4, actual = ts_BC.2[,4], test = test.BC[,4])
test_forecast(forecast.obj = fcast.BC.5, actual = ts_BC.2[,5], test = test.BC[,5])
test_forecast(forecast.obj = fcast.BC.6, actual = ts_BC.2[,6], test = test.BC[,6])
test_forecast(forecast.obj = fcast.mid.4, actual = ts_midwest.2[,4], test = test.mid[,4])
test_forecast(forecast.obj = fcast.mid.5, actual = ts_midwest.2[,5], test = test.mid[,5])
test_forecast(forecast.obj = fcast.mid.6, actual = ts_midwest.2[,6], test = test.mid[,6])
test_forecast(forecast.obj = fcast.mar.4, actual = ts_maritimes.2[,4], test = test.mar[,4])
test_forecast(forecast.obj = fcast.mar.5, actual = ts_maritimes.2[,5], test = test.mar[,5])
test_forecast(forecast.obj = fcast.mar.6, actual = ts_maritimes.2[,6], test = test.mar[,6])
test_forecast(forecast.obj = fcast.terr.4, actual = ts_territories.2[,4], test = test.terr[,4])
test_forecast(forecast.obj = fcast.terr.5, actual = ts_territories.2[,5], test = test.terr[,5])
test_forecast(forecast.obj = fcast.terr.6, actual = ts_territories.2[,6], test = test.terr[,6])
Part 9: ARIMA modeling and Forecasts
##ARIMA model or SARIMA
##trying non seasonal ARIMA models
##with the currently non differenced data
##staring with auto.arima
####################All cancer sites
#(auto.arima(train.ON[,4], seasonal = TRUE))
#(Arima(train.ON[,4], order = c(1,0,5), seasonal = c(1,0,1)))#model from auto.arima
#AIC=6659.09 AICc=6659.45 BIC=6703.47
#(Arima(train.ON[,4], order = c(1,1,6), seasonal = c(0,1,1)))
#(arima_ON.m<- Arima(train.ON[,4], order = c(1,1,6), seasonal = c(1,1,1)))
#AIC=6127.88 AICc=6128.27 BIC=6171.37
#(arima_ON.m<- Arima(train.ON[,4], order = c(1,1,8), seasonal = c(0,1,1)))
#AIC=6089.67 AICc=6090.14 BIC=6137.51
#arima_ON.m<- Arima(train.ON[,4], order = c(1,1,8), seasonal = c(0,1,2))####BEST AIC
arima_ON.m<- arima(train.ON[,4], order = c(1,1,6), seasonal = list(order=c(0,1,2),period=52))#aic = 5944.63
#AIC=5865.88 AICc=5866.44 BIC=5918.07
#################Lungs
#(arima_ON.5<- auto.arima(train.ON[,5], seasonal = TRUE))
#(arima_ON.5<- Arima(train.ON[,5], order = c(1,0,5), seasonal = c(0,0,1)))#model from auto.arima
#AIC=5183.45 AICc=5183.75 BIC=5223.39
#(arima_ON.5.m<- Arima(train.ON[,5], order = c(1,0,5), seasonal = c(0,1,1)))
# AIC=4791.34 AICc=4791.6 BIC=4826.15
#(arima_ON.5.m<- Arima(train.ON[,5], order = c(1,0,7), seasonal = c(0,1,1)))
#AIC=4742.87 AICc=4743.26 BIC=4786.38
#(arima_ON.5.m<- Arima(train.ON[,5], order = c(1,0,8), seasonal = c(0,1,2)))
#AIC=4506.99 AICc=4507.55 BIC=4559.2
#arima_ON.5.m<- Arima(train.ON[,5], order = c(1,0,9), seasonal = c(0,1,2))##BEST AIC
#AIC=4497.12 AICc=4497.77 BIC=4553.68
arima_ON.5.m<- arima(train.ON[,5], order = c(1,0,8), seasonal = list(order=c(0,1,2),period=52))
#aic = 4506.99
#############################COLON
#(arima_ON.6<- auto.arima(train.ON[,6], seasonal = FALSE)) #False higher AIC than True
#(arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,5), seasonal = c(0,0,0)))#model from auto.arima
#AIC=4753.36 AICc=4753.59 BIC=4788.86
#(arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,6), seasonal = c(0,0,0)))
#AIC=4689.1 AICc=4689.39 BIC=4729.04
#(arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,5), seasonal = c(0,1,1)))
#AIC=4437.09 AICc=4437.35 BIC=4471.9
#(arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,6), seasonal = c(0,1,1)))
#AIC=4352.96 AICc=4353.28 BIC=4392.12
#(arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,7), seasonal = c(0,1,1)))
#AIC=4335.71 AICc=4336.1 BIC=4379.22
#arima_ON.6.m<- Arima(train.ON[,6], order = c(1,0,8), seasonal = list(order=c(0,1,2), period=52))##BEST AIC
#AIC=4117.67 AICc=4118.23 BIC=4169.88
arima_ON.6.m<- arima(train.ON[,6], order = c(1,0,6), seasonal = list(order=c(0,1,2), period=52))
#aic = 4150.05
test_forecast(forecast.obj = fcast_ON.4, actual = ts_ON.2[,4], test = test.ON[,4])
test_forecast(forecast.obj = fcast_ON.5, actual = ts_ON.2[,5], test = test.ON[,5])
test_forecast(forecast.obj = fcast_ON.6, actual = ts_ON.2[,6], test = test.ON[,6])
#######################All cancer sites
#(arima_QC<- auto.arima(train.QC[,4], seasonal = TRUE))
#(arima_QC<- Arima(train.QC[,4], order = c(1,0,1), seasonal = c(0,0,1)))##auto.arima model
#AIC=2813.12 AICc=2813.38 BIC=2830.28
#(arima_QC.m<- Arima(train.QC[,4], order = c(1,0,3), seasonal = c(0,0,1)))
#AIC=2799.16 AICc=2799.66 BIC=2823.19
#(arima_QC.m<- Arima(train.QC[,4], order = c(1,0,3), seasonal = c(0,1,1)))
#AIC=2570.48 AICc=2570.89 BIC=2590.56
#(arima_QC.m<- Arima(train.QC[,4], order = c(1,1,4), seasonal = c(0,1,1)))
#AIC=2568.64 AICc=2569.2 BIC=2592.04
#arima_QC.m<- Arima(train.QC[,4], order = c(1,1,4), seasonal = c(0,1,2))##BEST AIC
#AIC=2526.05 AICc=2526.77 BIC=2552.79
arima_QC.m<- arima(train.QC[,4], order = c(1,1,4), seasonal = list(order =c(0,1,2),period=19))
##########################LUNGS
#(arima_QC.5<- auto.arima(train.QC[,5], seasonal = TRUE))
#(arima_QC.5<-Arima(train.QC[,5], order = c(0,0,1), seasonal = c(0,0,2)))##auto.arima model
#AIC=2407.76 AICc=2408.02 BIC=2424.92
#(arima_QC.5.m<-Arima(train.QC[,5], order = c(1,0,4), seasonal = c(0,1,2)))
#AIC=2190.36 AICc=2191.08 BIC=2217.14
#(arima_QC.5.m<-Arima(train.QC[,5], order = c(1,1,4), seasonal = c(0,1,2)))
#AIC=2188.35 AICc=2189.07 BIC=2215.09
#(arima_QC.5.m<-Arima(train.QC[,5], order = c(1,1,4), seasonal = c(0,1,3)))
#AIC=2178.95 AICc=2179.85 BIC=2209.03
#(arima_QC.5.m<-Arima(train.QC[,5], order = c(1,1,4), seasonal = c(0,1,4)))
#AIC=2125.93 AICc=2127.04 BIC=2159.36
#arima_QC.5.m<-Arima(train.QC[,5], order = c(1,1,4), seasonal = c(0,1,5))
#AIC=2104.38 AICc=2105.72 BIC=2141.14
arima_QC.5.m<- arima(train.QC[,5], order = c(1,1,4), seasonal = list(order =c(0,1,5),period=19))
#aic = 2104.38
############################# Colon
#(arima_QC.6<- auto.arima(train.QC[,6], seasonal = TRUE))
#(arima_QC.6.m<-Arima(train.QC[,6], order = c(0,0,3), seasonal = c(0,0,1)))##auto.arima model
#AIC=2258.1 AICc=2258.48 BIC=2278.7
#(arima_QC.6.m<-Arima(train.QC[,6], order = c(4,0,3), seasonal = c(0,0,1)))
#AIC=2250.51 AICc=2251.52 BIC=2284.85
#(arima_QC.6.m<-Arima(train.QC[,6], order = c(1,0,3), seasonal = c(0,1,1)))
#AIC=2114.69 AICc=2115.11 BIC=2134.78
#(arima_QC.6.m<-Arima(train.QC[,6], order = c(1,0,3), seasonal = c(0,2,1)))
#AIC=2110.41 AICc=2110.86 BIC=2129.92
#(arima_QC.6.m<-Arima(train.QC[,6], order = c(1,0,3), seasonal = c(0,2,4)))
#AIC=2005.74 AICc=2006.74 BIC=2035.01
#arima_QC.6.m<-Arima(train.QC[,6], order = c(1,0,3), seasonal = c(0,2,5))##BEST AIC
#AIC=1980.22 AICc=1981.44 BIC=2012.74
arima_QC.6.m<- arima(train.QC[,6], order = c(1,0,3), seasonal = list(order =c(0,2,5),period=19))
#aic = 1980.22
test_forecast(forecast.obj = fcast_QC.4, actual = ts_QC.2[,4], test = test.QC[,4])
test_forecast(forecast.obj = fcast_QC.5, actual = ts_QC.2[,5], test = test.QC[,5])
test_forecast(forecast.obj = fcast_QC.6, actual = ts_QC.2[,6], test = test.QC[,6])
##############################ALL cancer sites
#(arima_BC<-auto.arima(train.BC[,4], seasonal = TRUE))
#(arima_BC<- Arima(train.BC[,4], order = c(1,0,5), seasonal = c(0,0,1)))#auto.arima model
#AIC=2187.68 AICc=2188.6 BIC=2217.58
#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,5), seasonal = c(0,1,1)))
#AIC=2041.8 AICc=2042.6 BIC=2067.69
#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,6), seasonal = c(0,1,1)))
#AIC=2020.01 AICc=2021.02 BIC=2049.14
#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,2)))
#AIC=1917.45 AICc=1919.54 BIC=1959.52
#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,3)))
#AIC=1906.1 AICc=1908.53 BIC=1951.41
#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,4)))
#AIC=1847.06 AICc=1849.85 BIC=1895.61
#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,5)))
#AIC=1823.34 AICc=1826.52 BIC=1875.12
#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,6)))
#AIC=1815.42 AICc=1819.02 BIC=1870.44
#(arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,7)))
#AIC=1784.21 AICc=1788.26 BIC=1842.46
#arima_BC.m<- Arima(train.BC[,4], order = c(1,0,9), seasonal = c(0,1,8))
#AIC=1779.17 AICc=1783.69 BIC=1840.66
##takes forever to run
arima_BC.m<- arima(train.BC[,4], order = c(1,0,8), seasonal = list(order=c(0,1,6),period=17))
#aic = 1831.82
#############################################LUNGS
(arima_BC.<-auto.arima(train.BC[,5], seasonal = FALSE))##better than seasonal=TRUE
## Series: train.BC[, 5]
## ARIMA(4,0,2) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 mean
## -0.2212 -0.5840 0.3718 -0.3439 0.6443 0.9447 70.1662
## s.e. 0.0686 0.0633 0.0632 0.0679 0.0321 0.0347 0.9781
##
## sigma^2 = 95.71: log likelihood = -757.62
## AIC=1531.24 AICc=1531.98 BIC=1557.82
#AIC=1531.24 AICc=1531.98 BIC=1557.82
#(arima_BC.5.m<- Arima(train.BC[,5], order = c(2,0,4), seasonal = c(0,1,1)))
#AIC=1358.72 AICc=1359.53 BIC=1384.62
#(arima_BC.5.m<- Arima(train.BC[,5], order = c(2,0,4), seasonal = c(1,1,1)))
#AIC=1334.85 AICc=1335.86 BIC=1363.98
#(arima_BC.5.m<- Arima(train.BC[,5], order = c(2,0,4), seasonal = c(1,1,4)))
#AIC=1242.85 AICc=1244.64 BIC=1281.69
#arima_BC.5.m<- Arima(train.BC[,5], order = c(2,0,4), seasonal = c(2,1,4))###BEST AIC
#AIC=1148.16 AICc=1150.25 BIC=1190.23
arima_BC.5.m<- arima(train.BC[,5], order = c(2,0,4), seasonal = list(order=c(2,1,4),period=17))
#aic = 1148.16
################################# Colon
#(arima_BC.6<-auto.arima(train.BC[,6], seasonal = TRUE))
#(arima_BC.6.m<- Arima(train.BC[,6], order = c(1,0,5), seasonal = c(0,0,2)))#auto.arima model
#AIC=1444.7 AICc=1445.83 BIC=1477.93
#(arima_BC.6.m<- Arima(train.BC[,6], order = c(1,0,5), seasonal = c(0,1,2)))
#AIC=1361.42 AICc=1362.43 BIC=1390.55
#(arima_BC.6.m<- Arima(train.BC[,6], order = c(1,0,5), seasonal = c(0,1,4)))
#AIC=1302.51 AICc=1304.01 BIC=1338.11
arima_BC.6.m<- arima(train.BC[,6], order = c(1,0,6), seasonal = list(order=c(0,1,4),perid=17))##BEST AIC
#aic = 1285
test_forecast(forecast.obj = fcast_BC.4, actual = ts_BC.2[,4], test = test.BC[,4])
test_forecast(forecast.obj = fcast_BC.5, actual = ts_BC.2[,5], test = test.BC[,5])
test_forecast(forecast.obj = fcast_BC.6, actual = ts_BC.2[,6], test = test.BC[,6])
##################################ALL cancer sites
#(arima_mid<- auto.arima(train.mid[,4], seasonal= TRUE))
#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,2), seasonal = c(1,1,0)))#auto.arima model
#AIC=2581.3 AICc=2581.62 BIC=2602.77
#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,1,0)))
#AIC=2577.45 AICc=2577.88 BIC=2602.51
#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,1,1)))
#AIC=2555.31 AICc=2555.87 BIC=2583.95
#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,2,1)))
#AIC=2442.21 AICc=2442.83 BIC=2470.09
#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,2,3)))
#AIC=2419.11 AICc=2420.07 BIC=2453.96
#(arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,2,5)))
#AIC=2412.21 AICc=2413.58 BIC=2454.02
#arima_mid.m<- Arima(train.mid[,4], order = c(2,0,3), seasonal = c(1,2,6))##no noticeable improvement from 6->8
#AIC=2405.48 AICc=2407.08 BIC=2450.78
arima_mid.m<- arima(train.mid[,4], order = c(2,0,3), seasonal = list(order=c(1,2,6),period=24))
#aic = 2405.48
###########################################LUNGS
#(arima_mid.5<- auto.arima(train.mid[,5], seasonal= TRUE))
#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,1,2), seasonal = c(2,0,1)))##auto.arima model
#AIC=2239.1 AICc=2240.24 BIC=2283.06
#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,1,3), seasonal = c(2,0,1)))
#AIC=2169.99 AICc=2171.12 BIC=2213.94
#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,1,3), seasonal = c(2,1,1)))
#AIC=2022.71 AICc=2023.95 BIC=2065.62
#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,1,3), seasonal = c(2,1,2)))
#AIC=2020.75 AICc=2022.2 BIC=2067.23
#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,1,3), seasonal = c(2,2,2)))
#AIC=2000.26 AICc=2001.87 BIC=2045.51
#(arima_mid.5.m<- Arima(train.mid[,5], order = c(5,2,3), seasonal = c(2,2,2)))
#AIC=1991.14 AICc=1992.75 BIC=2036.33 ##also solved warning in previous version
#arima_mid.5.m<- Arima(train.mid[,5], order = c(5,2,4), seasonal = c(2,2,2))##BEST AIC
#AIC=1971.84 AICc=1973.72 BIC=2020.52
arima_mid.5.m<- arima(train.mid[,5], order = c(5,2,4), seasonal = list(order=c(2,2,2),period=24))
########################################## Colon
#(arima_mid.6<- auto.arima(train.mid[,6], seasonal= TRUE))
#(arima_mid.5.m<- Arima(train.mid[,6], order = c(2,0,0), seasonal = c(2,1,1))) ##auto.arima model
#AIC=1904.11 AICc=1904.55 BIC=1929.17
#(arima_mid.5.m<- Arima(train.mid[,6], order = c(2,0,2), seasonal = c(2,1,1)))
#AIC=1898.68 AICc=1899.24 BIC=1927.32
#(arima_mid.5.m<- Arima(train.mid[,6], order = c(2,1,2), seasonal = c(2,1,1)))
#AIC=1891.71 AICc=1892.28 BIC=1920.32
#(arima_mid.5.m<- Arima(train.mid[,6], order = c(2,1,2), seasonal = c(2,1,2)))
#AIC=1887.73 AICc=1888.44 BIC=1919.92
#(arima_mid.5.m<- Arima(train.mid[,6], order = c(2,1,2), seasonal = c(2,2,2)))
#AIC=1851.87 AICc=1852.65 BIC=1883.19
#arima_mid.6.m<- Arima(train.mid[,6], order = c(2,1,2), seasonal = c(2,3,2)) ##BEST AIC
#AIC=1840.36 AICc=1841.23 BIC=1870.74
arima_mid.6.m<- arima(train.mid[,6], order = c(2,1,2), seasonal = list(order=c(2,3,2),period=24))
test_forecast(forecast.obj = fcast_mid.4, actual = ts_midwest.2[,4], test = test.mid[,4])
test_forecast(forecast.obj = fcast_mid.5, actual = ts_midwest.2[,5], test = test.mid[,5])
test_forecast(forecast.obj = fcast_mid.6, actual = ts_midwest.2[,6], test = test.mid[,6])
##forecasts estimate higher incidence than the actual values
#(arima_mar<- auto.arima(train.mar[,4], seasonal = TRUE))
#(arima_mar.m <- Arima(train.mar[,4], order = c(4,1,0) ,seasonal = c(0,0,2))) #auto.arima model
#AIC=2201.95 AICc=2202.61 BIC=2229.38
#(arima_mar.m <- Arima(train.mar[,4], order = c(4,1,1) ,seasonal = c(0,0,2)))
#AIC=2198.94 AICc=2199.6 BIC=2226.37
#(arima_mar.m <- Arima(train.mar[,4], order = c(4,1,3) ,seasonal = c(0,0,2)))
#AIC=2131.73 AICc=2132.74 BIC=2166.02
#(arima_mar.m <- Arima(train.mar[,4], order = c(4,1,3) ,seasonal = c(0,1,2)))
#AIC=2002.72 AICc=2003.83 BIC=2036.14
#(arima_mar.m <- Arima(train.mar[,4], order = c(4,1,3) ,seasonal = c(0,2,2)))
#AIC=1916.7 AICc=1917.93 BIC=1949.17
#arima_mar.m <- Arima(train.mar[,4], order = c(4,1,3) ,seasonal = c(0,2,3))## BEST AIC
#AIC=1915.16 AICc=1916.65 BIC=1950.88
arima_mar.m <- arima(train.mar[,4], order = c(4,1,3) ,seasonal = list(order=c(0,2,3),period=19))
######################################LUNGS
#(arima_mar.5<- auto.arima(train.mar[,5], seasonal = TRUE))
#(arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,1), seasonal = c(0,0,2)))##auto.arima model
#AIC=1663.09 AICc=1664.1 BIC=1697.42
#(arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,1), seasonal = c(0,1,2)))
#AIC=1581.21 AICc=1582.11 BIC=1611.33
#(arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,2), seasonal = c(0,1,2)))
#AIC=1558.89 AICc=1559.99 BIC=1592.36
#(arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,2), seasonal = c(0,2,2)))
#AIC=1516.86 AICc=1518.08 BIC=1549.38
#(arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,2), seasonal = c(0,3,2)))
#AIC=1474.06 AICc=1475.43 BIC=1505.54
#arima_mar.5.m<- Arima(train.mar[,5], order = c(5,0,3), seasonal = c(0,3,2))##BEST AIC
#AIC=1472.04 AICc=1473.69 BIC=1506.66
arima_mar.5.m<- arima(train.mar[,5], order = c(5,0,3), seasonal = list(order=c(0,3,2),period=19))
############################### Colon
#(arima_mar.6<- auto.arima(train.mar[,6], seasonal = TRUE))
#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,5), seasonal = c(0,0,1))) #auto.arima model
#AIC=1668.67 AICc=1669.33 BIC=1696.1
#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,5), seasonal = c(0,1,1)))
#AIC=1596.2 AICc=1596.75 BIC=1619.59
#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,6), seasonal = c(0,1,1)))
#AIC=1566.62 AICc=1567.34 BIC=1593.36
#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,10), seasonal = c(0,1,4)))
#AIC=1497.4 AICc=1499.89 BIC=1547.53
#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,10), seasonal = c(0,1,5)))
#AIC=1491.06 AICc=1493.89 BIC=1544.54
#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,10), seasonal = c(0,2,5)))
#AIC=1471.18 AICc=1474.32 BIC=1523.13
#(arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,10), seasonal = c(0,3,5)))
#AIC=1469.12 AICc=1472.65 BIC=1519.38
#arima_mar.6.m<- Arima(train.mar[,6], order = c(0,1,10), seasonal = c(0,3,6))##BEST AIC
#AIC=1462.55 AICc=1466.55 BIC=1515.95
arima_mar.6.m<- arima(train.mar[,6], order = c(1,1,8), seasonal = list(order= c(0,3,6),period=19))
#aic = 1477.06
test_forecast(forecast.obj = fcast_mar.4, actual = ts_maritimes.2[,4], test = test.mar[,4])
test_forecast(forecast.obj = fcast_mar.5, actual = ts_maritimes.2[,5], test = test.mar[,5])
test_forecast(forecast.obj = fcast_mar.6, actual = ts_maritimes.2[,6], test = test.mar[,6])
#####################ALL cancer sites
#(arima_terr<-auto.arima(train.terr[,4], seasonal = TRUE, stepwise = FALSE, approximation = FALSE))
arima_terr.m<- arima(train.terr[,4], order = c(0,0,0), seasonal = list(order=c(0,1,0),period=3))##auto.arima model
#AIC=347.42 AICc=347.55 BIC=348.95
##BEST AIC
############################### LUNGS
#(arima_terr.5<-auto.arima(train.terr[,5], seasonal = TRUE))
arima_terr.5.m<- arima(train.terr[,5], order= c(0,0,0), seasonal = list(order=c(2,0,0),period=3))##auto.arima model
#AIC=283.85 AICc=285.1 BIC=290.29
##BEST AIC
################################# COlon
#(arima_terr.6<-auto.arima(train.terr[,6], seasonal = TRUE))
#> ARIMA(2,1,0)
#> AIC=346.54 AICc=347.29 BIC=351.29
#(arima_terr.6.m<- Arima(train.terr[,6], order= c(2,1,1), seasonal = c(0,0,0)))
#AIC=348.5 AICc=289.19 BIC=294.24
arima_terr.6.m<- arima(train.terr[,6], order= c(2,1,1), seasonal = list(order=c(0,1,0),period=3))
#AIC=322.89 AICc=267.86 BIC=272.42
test_forecast(forecast.obj = fcast_terr.4, actual = ts_territories.2[,4], test = test.terr[,4])
test_forecast(forecast.obj = fcast_terr.5, actual = ts_territories.2[,5], test = test.terr[,5])
test_forecast(forecast.obj = fcast_terr.6, actual = ts_territories.2[,6], test = test.terr[,6])
check_res(arima_ON.m)
check_res(arima_ON.5.m)
check_res(arima_ON.6.m)
check_res(arima_QC.m)
check_res(arima_QC.5.m)
check_res(arima_QC.6.m)
check_res(arima_BC.m)
check_res(arima_BC.5.m)
check_res(arima_BC.6.m)
check_res(arima_mid.m)
check_res(arima_mid.5.m)
check_res(arima_mid.6.m)
check_res(arima_mar.m)
check_res(arima_mar.5.m)
check_res(arima_mar.6.m)
check_res(arima_terr.m)
check_res(arima_terr.5.m)
check_res(arima_terr.6.m)
Part 10: Effectiveness - Accuracy Measures
#####################ARIMA
print("Ontario ARIMA models")
## [1] "Ontario ARIMA models"
accuracy(fcast_ON.4, test.ON[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set -0.7342033 32.45072 25.10075 -0.70603094 4.891489 0.2526362
## Test set 6.3203872 53.99158 45.75886 0.03411064 8.663069 0.4605577
## ACF1 Theil's U
## Training set -0.09777807 NA
## Test set -0.37092812 0.4930353
accuracy(fcast_ON.5, test.ON[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set 0.2171178 9.235597 6.750731 -1.725505 9.915781 0.3098923
## Test set -0.7523922 10.092238 7.897227 -3.375201 11.847799 0.3625222
## ACF1 Theil's U
## Training set -0.008318187 NA
## Test set 0.065808248 0.5861256
accuracy(fcast_ON.6, test.ON[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1858216 6.819994 5.167771 -0.9890724 7.843747 0.2872683
## Test set -1.4478105 9.954216 7.912281 -4.4579768 12.520854 0.4398313
## ACF1 Theil's U
## Training set -0.02687924 NA
## Test set -0.36120760 0.477058
################TSLM
print("Ontario TSLM models")
## [1] "Ontario TSLM models"
accuracy(fcast.ON.4, test.ON[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set -3.544617e-15 67.44486 56.59353 -1.650801 10.92957 0.5696074
## Test set -2.089438e+00 67.98010 57.51472 -2.036162 11.02530 0.5788791
## ACF1 Theil's U
## Training set -0.2916615 NA
## Test set -0.2922284 0.628614
accuracy(fcast.ON.5, test.ON[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set 4.089062e-16 16.01967 12.62884 -5.296708 19.01862 0.5797269
## Test set -1.224907e+00 13.31527 10.58071 -5.123035 16.09178 0.4857075
## ACF1 Theil's U
## Training set 0.006094595 NA
## Test set 0.121342684 0.8063011
accuracy(fcast.ON.6, test.ON[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set 1.588854e-16 12.67448 10.24572 -3.410354 15.48518 0.5695433
## Test set -1.890459e+00 12.62819 10.45913 -6.331039 16.62284 0.5814069
## ACF1 Theil's U
## Training set -0.187263 NA
## Test set -0.308252 0.6236434
#####################ARIMA
print("Quebec ARIMA models")
## [1] "Quebec ARIMA models"
accuracy(fcast_QC.4, test.QC[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set -3.402105 81.03498 58.78771 -2.11893804 9.923888 0.4261876
## Test set 5.704239 69.29902 56.19228 -0.05398666 9.875326 0.4073718
## ACF1 Theil's U
## Training set -0.01218758 NA
## Test set -0.16173147 0.5462605
accuracy(fcast_QC.5, test.QC[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set -0.7977578 24.50994 16.87430 -Inf Inf 0.2848762
## Test set -1.0018107 21.84940 16.00479 -3.434945 15.49545 0.2701968
## ACF1 Theil's U
## Training set -0.004871119 NA
## Test set 0.140461623 0.428896
accuracy(fcast_QC.6, test.QC[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set -1.380253 25.09385 13.542098 -4.913560 15.87571 0.3871488
## Test set -2.787314 11.30498 8.680159 -5.462049 12.75794 0.2481531
## ACF1 Theil's U
## Training set -0.002606181 NA
## Test set -0.103577339 0.5255439
################TSLM
print("Quebec TSLM models")
## [1] "Quebec TSLM models"
accuracy(fcast.QC.4, test.QC[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set -2.978838e-15 100.55372 76.58136 -2.482115 12.97538 0.5551845
## Test set -7.617443e+00 78.51699 63.78263 -2.726196 11.39109 0.4623988
## ACF1 Theil's U
## Training set 0.03674577 NA
## Test set -0.16572215 0.616928
accuracy(fcast.QC.5, test.QC[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set 6.478510e-17 47.05953 33.27729 -Inf Inf 0.5617955
## Test set -6.473464e+00 35.38083 27.75142 -12.19978 28.24806 0.4685064
## ACF1 Theil's U
## Training set 0.26967228 NA
## Test set 0.07461657 0.7020082
accuracy(fcast.QC.6, test.QC[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set 8.682651e-16 37.42596 20.82961 -9.129001 23.57742 0.5954883
## Test set -7.012978e+00 21.82977 16.59623 -13.103774 24.36062 0.4744620
## ACF1 Theil's U
## Training set 0.2870133 NA
## Test set 0.2124646 0.9684529
#####################ARIMA
print("BC ARIMA models")
## [1] "BC ARIMA models"
accuracy(fcast_BC.4, test.BC[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set -1.361911 18.03714 13.28185 -0.5247616 2.780829 0.1419872
## Test set 15.131115 38.39510 29.96387 2.2237066 5.722222 0.3203234
## ACF1 Theil's U
## Training set 0.006225926 NA
## Test set -0.224283986 0.3725037
accuracy(fcast_BC.5, test.BC[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set -0.2626233 3.532935 2.614178 -0.6773540 3.806916 0.1508118
## Test set 0.5055518 5.768628 3.911727 -0.1145212 5.622419 0.2256673
## ACF1 Theil's U
## Training set -0.03900797 NA
## Test set 0.35746108 0.3048858
accuracy(fcast_BC.6, test.BC[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set 0.49799296 5.060612 3.858002 -0.2063229 6.245824 0.2406451
## Test set 0.04078948 7.955122 6.564351 -1.7535620 10.635308 0.4094552
## ACF1 Theil's U
## Training set -0.01352428 NA
## Test set -0.07725039 0.4543273
################TSLM
print("BC TSLM models")
## [1] "BC TSLM models"
accuracy(fcast.BC.4, test.BC[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set 2.216742e-15 63.18969 53.03207 -1.635163 10.85632 0.5669299
## Test set 2.219328e+01 68.35999 54.26209 2.855463 10.38967 0.5800792
## ACF1 Theil's U
## Training set -0.2793209 NA
## Test set -0.2879627 0.6722001
accuracy(fcast.BC.5, test.BC[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set -3.462677e-17 12.19117 9.984786 -3.129079 14.88163 0.5760218
## Test set 3.083233e+00 13.23183 10.216855 1.274293 14.25824 0.5894098
## ACF1 Theil's U
## Training set 0.008420675 NA
## Test set 0.337046593 0.7132829
accuracy(fcast.BC.6, test.BC[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set 1.385071e-16 10.94196 9.155316 -3.194198 15.25255 0.5710682
## Test set 3.837316e-01 10.72559 9.187866 -2.197730 15.01381 0.5730985
## ACF1 Theil's U
## Training set -0.2001443 NA
## Test set -0.2110036 0.612524
#####################ARIMA
print("Midwest ARIMA models")
## [1] "Midwest ARIMA models"
accuracy(fcast_mid.4, test.mid[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set 2.846237 24.62107 17.02520 0.3594439 3.180817 0.5353263
## Test set -13.350272 24.62355 21.39882 -2.4703575 4.015196 0.6728470
## ACF1 Theil's U
## Training set -0.008567216 NA
## Test set 0.261144498 0.2343431
accuracy(fcast_mid.5, test.mid[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04218503 9.173924 6.403709 -0.5009628 8.460447 0.4780637
## Test set -6.69998552 9.846109 7.976579 -9.9995206 11.864569 0.5954851
## ACF1 Theil's U
## Training set -0.02733384 NA
## Test set 0.19375542 0.7186801
accuracy(fcast_mid.6, test.mid[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set -0.2642139 9.413829 6.084825 -0.6310913 8.881973 0.6569747
## Test set 1.8358503 7.256528 5.450974 2.8121503 8.141033 0.5885382
## ACF1 Theil's U
## Training set 0.004247999 NA
## Test set -0.351646059 0.425444
################TSLM
print("Midwest TSLM models")
## [1] "Midwest TSLM models"
accuracy(fcast.mid.4, test.mid[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set -3.148720e-15 28.18532 21.47508 -0.2770566 4.035879 0.6752449
## Test set -6.186426e-01 17.76653 14.92947 -0.3653245 2.907564 0.4694300
## ACF1 Theil's U
## Training set 0.3992370 NA
## Test set 0.3140968 0.1658009
accuracy(fcast.mid.5, test.mid[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set -9.89025e-17 12.672916 8.035323 -2.151047 10.050690 0.5998706
## Test set 2.15347e+00 6.528972 5.472111 2.648482 7.955706 0.4085160
## ACF1 Theil's U
## Training set 0.4113309 NA
## Test set 0.4080632 0.4293385
accuracy(fcast.mid.6, test.mid[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set -1.981194e-16 8.451017 6.096435 -1.317389 8.470573 0.6582282
## Test set -2.438379e+00 4.659719 3.558358 -3.995623 5.784766 0.3841936
## ACF1 Theil's U
## Training set 0.36855627 NA
## Test set 0.08218148 0.2675836
#####################ARIMA
print("Maritimes ARIMA models")
## [1] "Maritimes ARIMA models"
accuracy(fcast_mar.4, test.mar[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3748883 26.79171 19.22015 -0.03847801 3.467458 0.1669458
## Test set -1.7818321 45.60905 38.05424 -1.11150570 7.069351 0.3305383
## ACF1 Theil's U
## Training set -0.00384057 NA
## Test set -0.06842116 0.2647759
accuracy(fcast_mar.5, test.mar[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set 0.6745681 10.36877 7.255549 0.3295996 8.282918 0.2955188
## Test set -6.4261880 32.74793 26.085511 -16.0184045 32.872583 1.0624638
## ACF1 Theil's U
## Training set 0.006177642 NA
## Test set -0.384584439 0.5029747
accuracy(fcast_mar.6, test.mar[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set 0.2048485 9.438797 6.581651 -0.560942 8.84405 0.3244095
## Test set -2.7075505 14.791491 11.519138 -7.199916 18.14988 0.5677782
## ACF1 Theil's U
## Training set 0.02110998 NA
## Test set -0.18235521 0.6367498
################TSLM
print("Maritimes TSLM models")
## [1] "Maritimes TSLM models"
accuracy(fcast.mar.4, test.mar[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set -7.199985e-15 76.71853 63.82659 -1.792592 11.27545 0.5543963
## Test set 5.181360e+00 95.28617 80.87036 -1.992469 14.96103 0.7024381
## ACF1 Theil's U
## Training set -0.4244026 NA
## Test set -0.3925648 0.6155316
accuracy(fcast.mar.5, test.mar[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set -4.956447e-16 18.43706 14.41968 -4.159001 16.59026 0.587314
## Test set 5.095194e+00 31.80227 25.73884 -6.798168 30.52044 1.048344
## ACF1 Theil's U
## Training set -0.3786891 NA
## Test set -0.4429198 0.6071385
accuracy(fcast.mar.6, test.mar[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set -6.828106e-16 13.39477 11.189707 -2.878422 14.53431 0.5515405
## Test set -8.090630e-01 12.18777 9.636779 -4.731806 15.24833 0.4749968
## ACF1 Theil's U
## Training set -0.3605622 NA
## Test set -0.4031272 0.551602
#####################ARIMA
print("Territories ARIMA models")
## [1] "Territories ARIMA models"
accuracy(fcast_terr.4, test.terr[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set -6.120165 37.28218 28.90686 -1.3954971 5.494962 0.9204283
## Test set 3.516667 38.77106 35.61667 0.0400974 6.799237 1.1340763
## ACF1 Theil's U
## Training set 0.09744538 NA
## Test set -0.23667890 0.3825451
accuracy(fcast_terr.5, test.terr[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4571086 9.578816 7.40855 -0.7693159 8.82341 0.7928571
## Test set -5.4459145 17.358887 12.83183 -12.0646412 19.72843 1.3732517
## ACF1 Theil's U
## Training set -0.003140624 NA
## Test set -0.486631480 0.5206454
accuracy(fcast_terr.6, test.terr[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set -0.9890756 25.717435 17.98194 -1.559293 15.993746 0.9174458
## Test set -5.6941441 9.564309 6.14915 -7.593012 8.048197 0.3137321
## ACF1 Theil's U
## Training set -0.001604024 NA
## Test set -0.442237657 0.5946297
################TSLM
print("Territories TSLM models")
## [1] "Territories TSLM models"
accuracy(fcast.terr.4, test.terr[,4])
## ME RMSE MAE MPE MAPE MASE
## Training set -7.68135e-15 31.65667 23.92088 -0.3485376 4.472621 0.7616687
## Test set 2.10440e+01 44.34700 33.14954 3.4609298 5.963205 1.0555200
## ACF1 Theil's U
## Training set -0.04934423 NA
## Test set -0.12678814 0.4523574
accuracy(fcast.terr.5, test.terr[,5])
## ME RMSE MAE MPE MAPE MASE
## Training set -5.762974e-16 13.02349 10.732952 -2.228015 12.54838 1.148632
## Test set -2.724679e+00 10.63808 7.909188 -6.713301 12.02198 0.846435
## ACF1 Theil's U
## Training set -0.1829893 NA
## Test set -0.3429590 0.329316
accuracy(fcast.terr.6, test.terr[,6])
## ME RMSE MAE MPE MAPE MASE
## Training set 0.000000 28.99398 20.89838 -4.404564 18.05104 1.0662440
## Test set -6.253134 13.18546 8.36339 -7.741658 10.48581 0.4267036
## ACF1 Theil's U
## Training set -0.2056187 NA
## Test set -0.5186064 0.9042278
PART 11: Efficiency - Run time with benchmark
(ON.time<-benchmark(arima_ON.m,arima_ON.5.m,arima_ON.6.m,tsr_ON,tsr_ON.5,tsr_ON.6,
columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
## test elapsed replications
## 1 arima_ON.m 0.00 1000
## 2 arima_ON.5.m 0.00 1000
## 3 arima_ON.6.m 0.00 1000
## 4 tsr_ON 0.00 1000
## 6 tsr_ON.6 0.00 1000
## 5 tsr_ON.5 0.01 1000
(QC.time<-benchmark(arima_QC.m,arima_QC.5.m,arima_QC.6.m,tsr_QC,tsr_QC.5,tsr_QC.6,
columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
## test elapsed replications
## 1 arima_QC.m 0.00 1000
## 2 arima_QC.5.m 0.00 1000
## 4 tsr_QC 0.00 1000
## 5 tsr_QC.5 0.00 1000
## 6 tsr_QC.6 0.00 1000
## 3 arima_QC.6.m 0.01 1000
(BC.time<-benchmark(arima_BC.6.m, arima_BC.5.m, arima_BC.m, tsr_BC, tsr_BC.5,tsr_BC.6,
columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
## test elapsed replications
## 1 arima_BC.6.m 0.00 1000
## 2 arima_BC.5.m 0.00 1000
## 4 tsr_BC 0.00 1000
## 5 tsr_BC.5 0.00 1000
## 6 tsr_BC.6 0.00 1000
## 3 arima_BC.m 0.01 1000
(mid.time<-benchmark(arima_mid.m,arima_mid.5.m,arima_mid.6.m,tsr_mid,tsr_mid.5,tsr_mid.6,
columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
## test elapsed replications
## 1 arima_mid.m 0 1000
## 2 arima_mid.5.m 0 1000
## 3 arima_mid.6.m 0 1000
## 4 tsr_mid 0 1000
## 5 tsr_mid.5 0 1000
## 6 tsr_mid.6 0 1000
(mar.time<-benchmark(arima_mar.m,arima_mar.5.m,arima_mar.6.m,tsr_mar,tsr_mar.5,tsr_mar.6,
columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
## test elapsed replications
## 1 arima_mar.m 0 1000
## 2 arima_mar.5.m 0 1000
## 3 arima_mar.6.m 0 1000
## 4 tsr_mar 0 1000
## 5 tsr_mar.5 0 1000
## 6 tsr_mar.6 0 1000
(terr.time<-benchmark(arima_terr.m,arima_terr.5.m,arima_terr.6.m,tsr_terr,tsr_terr.5,tsr_terr.6,
columns=c('test', 'elapsed', 'replications'), replications= 1000, order='elapsed'))
## test elapsed replications
## 1 arima_terr.m 0 1000
## 2 arima_terr.5.m 0 1000
## 3 arima_terr.6.m 0 1000
## 4 tsr_terr 0 1000
## 5 tsr_terr.5 0 1000
## 6 tsr_terr.6 0 1000
Part 12: Stability of ARIMA functions Inverse roots